#!/bin/bash
#
# CP2K (Intel/MKL x86_64) arch file for the Alps/Eiger (AMD Zen 2) cluster at CSCS
#
# Tested with: Intel 2021.10.0 (MPI/MKL) on the Alps/Eiger (CSCS)
#
# Usage: Source this arch file and then run make as instructed.
#        Replace or adapt the "module load" commands below if needed.
#
# Last update: 21.02.2025
#
# \
   if [[ "${0}" == "${BASH_SOURCE}" ]]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   cd tools/toolchain; \
   rm -rf build; \
   module purge; \
   module load cray; \
   module swap PrgEnv-cray PrgEnv-intel; \
   module load cray-fftw; \
   module load cray-python; \
   module list; \
   [[ -z "${TARGET_CPU}" ]] && export TARGET_CPU="core-avx2"; \
   ./install_cp2k_toolchain.sh -j32 --enable-cray --no-arch-files --target-cpu=${TARGET_CPU} --with-dftd4 --with-elpa --with-fftw --with-libtorch=no --with-libxsmm --with-intel --with-mkl --with-libtorch=no --with-libvdwxc --with-plumed; \
   source ./install/setup; \
   cd ../..; \
   echo; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j32 ARCH=${this_file%.*} VERSION=${this_file##*.} TARGET_CPU=${TARGET_CPU}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j32 ARCH=${this_file%.*} VERSION=${this_file##*.} TARGET_CPU=${TARGET_CPU} DO_CHECKS=yes"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
SHARED         := no
TARGET_CPU     := core-avx2

# Retrieve package versions
USE_COSMA      := $(COSMA_VER)
USE_DFTD4      := $(DFTD4_VER)
USE_ELPA       := $(ELPA_VER)
USE_HDF5       := $(HDF5_VER)
USE_LIBGRPP    := $(LIBGRPP_VER)
USE_LIBINT     := $(LIBINT_VER)
#USE_LIBTORCH   := $(LIBTORCH_VER)
USE_LIBVORI    := $(LIBVORI_VER)
USE_LIBXC      := $(LIBXC_VER)
USE_LIBXSMM    := $(LIBXSMM_VER)
USE_PLUMED     := $(PLUMED_VER)
USE_SIRIUS     := $(SIRIUS_VER)
USE_SPFFT      := $(SPFFT_VER)
USE_SPGLIB     := $(SPGLIB_VER)
USE_SPLA       := $(SPLA_VER)

LMAX           := 5
MAX_CONTR      := 4

CC             := cc
FC             := ftn
LD             := ftn
AR             := ar -r

CFLAGS         := -O2 -fPIC -fp-model precise -funroll-loops -g -mtune=$(TARGET_CPU) -qopenmp -qopenmp-simd -traceback
CFLAGS         += -diag-disable=10448

DFLAGS         := -D__parallel
DFLAGS         += -D__MKL
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

MKL_LIB        := $(MKLROOT)/lib/intel64

ifeq ($(SHARED), yes)
   LD_SHARED      := $(FC) -shared
   CFLAGS         += -fPIC
   LDFLAGS        := -Wl,--enable-new-dtags
   CP2K_LIB       := $(PWD)/lib/$(ARCH)/$(ONEVERSION)
   LDFLAGS        += -Wl,-rpath=$(CP2K_LIB)
   LDFLAGS        += -Wl,-rpath=$(CP2K_LIB)/exts/dbcsr
else
   LDFLAGS        := -static-intel
endif

# Settings for regression testing
ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   USE_GSL        := $(GSL_VER)
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(PLUMED_LIB) -L$(PLUMED_LIB) -lplumed -lplumedKernel
   else
      LIBS           += $(PLUMED_LIB)/libplumed.a
   endif
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/cpu/include/elpa_openmp-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/cpu/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(ELPA_LIB) -L$(ELPA_LIB) -lelpa_openmp
   else
      LIBS           += $(ELPA_LIB)/libelpa_openmp.a
   endif
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBVORI_LIB) -L$(LIBVORI_LIB) -lvori
   else
      LIBS           += $(LIBVORI_LIB)/libvori.a
   endif
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBXC_LIB) -L$(LIBXC_LIB) -lxcf03 -lxc
   else
      LIBS           += $(LIBXC_LIB)/libxcf03.a
      LIBS           += $(LIBXC_LIB)/libxc.a
   endif
endif

ifneq ($(USE_DFTD4),)
   USE_DFTD4      := $(strip $(USE_DFTD4))
   DFTD4_INC      := $(INSTALL_PATH)/dftd4-$(USE_DFTD4)/include
   DFTD4_LIB      := $(INSTALL_PATH)/dftd4-$(USE_DFTD4)/lib
   CFLAGS         += -I$(shell ls -d $(DFTD4_INC)/dftd4/Intel-*)
   DFLAGS         += -D__DFTD4
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(DFTD4_LIB) -L$(DFTD4_LIB) -ldftd4 -lmstore -lmulticharge -lmctc-lib
   else
      LIBS           += $(DFTD4_LIB)/libdftd4.a
      LIBS           += $(DFTD4_LIB)/libmstore.a
      LIBS           += $(DFTD4_LIB)/libmulticharge.a
      LIBS           += $(DFTD4_LIB)/libmctc-lib.a
   endif
endif

ifneq ($(USE_LIBGRPP),)
   USE_LIBGRPP    := $(strip $(USE_LIBGRPP))
   DFLAGS         += -D__LIBGRPP
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBINT_LIB) -L$(LIBINT_LIB) -lint2
   else
      LIBS           += $(LIBINT_LIB)/libint2.a
   endif
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SPGLIB_LIB) -L$(SPGLIB_LIB) -lsymspg
   else
      LIBS           += $(SPGLIB_LIB)/libsymspg.a
   endif
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(LIBXSMM_LIB) -L$(LIBXSMM_LIB) -lxsmmf -lxsmmext -lxsmm
   else
      LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
      LIBS           += $(LIBXSMM_LIB)/libxsmmext.a
      LIBS           += $(LIBXSMM_LIB)/libxsmm.a
   endif
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   USE_GSL        := ${GSL_VER}
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   PUGIXML_LIB    := $(INSTALL_PATH)/pugixml-$(PUGIXML_VER)/lib
   SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/sirius
   SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SIRIUS
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SIRIUS_LIB) -L$(SIRIUS_LIB) -lsirius -lsirius_cxx
      LIBS           += -Wl,-rpath=$(PUGIXML_LIB) -L$(PUGIXML_LIB) -lpugixml
      LIBS           += -Wl,-rpath=$(LIBVDWXC_LIB) -L$(LIBVDWXC_LIB) -lvdwxc
   else
      LIBS           += $(SIRIUS_LIB)/libsirius.a
      LIBS           += $(SIRIUS_LIB)/libsirius_cxx.a
      LIBS           += $(PUGIXML_LIB)/libpugixml.a
      LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   endif
endif

ifneq ($(USE_SPFFT),)
   USE_SPFFT      := $(strip $(USE_SPFFT))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(USE_SPFFT)/include
   SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(USE_SPFFT)/lib
   CFLAGS         += -I$(SPFFT_INC)
   DFLAGS         += -D__SPFFT
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SPFFT_LIB) -L$(SPFFT_LIB) -lspfft
   else
      LIBS           += $(SPFFT_LIB)/libspfft.a
   endif
endif

ifneq ($(USE_SPLA),)
   USE_SPLA       := $(strip $(USE_SPLA))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(USE_SPLA)/include/spla
   SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(USE_SPLA)/lib
   CFLAGS         += -I$(SPLA_INC)
   DFLAGS         += -D__SPLA
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(SPLA_LIB) -L$(SPLA_LIB) -lspla
   else
      LIBS           += $(SPLA_LIB)/libspla.a
   endif
endif

ifneq ($(USE_HDF5),)
   USE_HDF5       := $(strip $(USE_HDF5))
   HDF5_INC       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib
   CFLAGS         += -I$(HDF5_INC)
   DFLAGS         += -D__HDF5
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath,$(HDF5_LIB) -L$(HDF5_LIB) -lhdf5_fortran -lhdf5
   else
      LIBS           += $(HDF5_LIB)/libhdf5_fortran.a
      LIBS           += $(HDF5_LIB)/libhdf5_f90cstub.a
      LIBS           += $(HDF5_LIB)/libhdf5.a
   endif
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(COSMA_LIB) -L$(COSMA_LIB) -lcosma_prefixed_pxgemm -lcosma -lcosta
   else
      LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
      LIBS           += $(COSMA_LIB)/libcosma.a
      LIBS           += $(COSMA_LIB)/libcosta.a
   endif
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   ifeq ($(SHARED), yes)
      LIBS           += -Wl,-rpath=$(GSL_LIB) -L$(GSL_LIB) -lgsl
   else
      LIBS           += $(GSL_LIB)/libgsl.a
   endif
endif

ifeq ($(SHARED), yes)
   LIBS           += -Wl,-rpath=$(MKL_LIB) -L$(MKL_LIB) -lmkl_scalapack_lp64
   LIBS           += -Wl,--start-group
   LIBS           += -lmkl_intel_lp64
   LIBS           += -lmkl_sequential
   LIBS           += -lmkl_core
   LIBS           += -lmkl_blacs_intelmpi_lp64
   LIBS           += -Wl,--end-group
else
   LIBS           += $(MKL_LIB)/libmkl_scalapack_lp64.a
   LIBS           += -Wl,--start-group
   LIBS           += $(MKL_LIB)/libmkl_intel_lp64.a
   LIBS           += $(MKL_LIB)/libmkl_sequential.a
   LIBS           += $(MKL_LIB)/libmkl_core.a
   LIBS           += $(MKL_LIB)/libmkl_blacs_intelmpi_lp64.a
   LIBS           += -Wl,--end-group
endif

ifeq ($(shell [ $(shell ldd --version | head -n 1 | tr -s '.' '\n' | tail -n 1) -ge 27 ] && echo yes), yes)
   ifneq ($(USE_LIBTORCH),)
      USE_LIBTORCH   := $(strip $(USE_LIBTORCH))
      LIBTORCH_INC   := $(INSTALL_PATH)/libtorch-$(USE_LIBTORCH)/include
      LIBTORCH_LIB   := $(INSTALL_PATH)/libtorch-$(USE_LIBTORCH)/lib
      CFLAGS         += -I$(LIBTORCH_INC)
      DFLAGS         += -D__LIBTORCH
      LIBS           += -Wl,-rpath=$(LIBTORCH_LIB) -L$(LIBTORCH_LIB) -lc10 -ltorch_cpu -ltorch
   endif
endif

CFLAGS         += $(DFLAGS)
CFLAGS         += -I$(MKLROOT)/include
CFLAGS         += -I$(MKLROOT)/include/fftw

CXXFLAGS       := $(CFLAGS) -std=c++14

FCFLAGS        := $(CFLAGS)
FCFLAGS        += -diag-disable=8291
FCFLAGS        += -diag-disable=8293
FCFLAGS        += -fpp
FCFLAGS        += -fpscomp logicals
FCFLAGS        += -free
#FCFLAGS        += -std08

LDFLAGS        += $(FCFLAGS)
LDFLAGS_C      := -nofor-main

LIBS           += -lz -ldl -lstdc++

# Required due to memory leak that occurs if high optimisations are used
mp2_optimize_ri_basis.o: mp2_optimize_ri_basis.F
	$(FC) -c $(subst O2,O0,$(FCFLAGS)) $<
# Required due to SEGFAULTS occurring for higher optimisation levels
paw_basis_types.o: paw_basis_types.F
	$(FC) -c $(subst O2,O1,$(FCFLAGS)) $<
# Reduce compilation time
hfx_contraction_methods.o: hfx_contraction_methods.F
	$(FC) -c $(subst -O2,-O1,$(FCFLAGS)) $<

# End
